export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=1
export VLLM_W8A8_MOE_USE_W4A8=1

vllm serve /home/weights_68/Qwen/Qwen3-Coder-480B-A35B-Instruct-int4-pack8  \
--served_model_name "qwen3-coder-480" \
-tp 4 -pp 4 \
--enable_chunked_prefill \
--enable-prefix-caching \
--disable_log_stats \
--disable_log_requests \
--max-model-len $[180*1024] \
--host 0.0.0.0 --port 8000
